" Input: { (user) - force different user ID/email for demos (data_uoa) - program UOA to benchmark it (tags) - prune programs by tags (opencl, cpu, armcl, caffe, tensorflow ...) (species) - list of species (cmd_key) - prune by CMD key, otherwise try all (dataset_uoa) - prune by dataset UOA, otherwise try all (dataset_file) - prune by dataset filename, otherwise try all (library) - if !='', specify one or several lib UOAs to use (comma separated) (pause_if_fail) - if pipeline fails, ask to press Enter (useful to analyze which flags fail during compiler flag autotuning) (pause) - if 'yes', pause before compiling and running test (list_tests) - show all tests to be performed, but do not run them (dry_run) - if 'yes', prepare pipeline and resolve dependencies, but do not run it (testing) (skip_deps_cache) - if 'yes', do not cache deps (deps_cache) - name of cache for deps (in local:tmp:cache-deps-nntest-{cache_deps}) and reuse them for all tests (by tags) (NOT COMPLETELY FINISHED - not recorded at the end - TBD) (refresh_deps_cache) - if 'yes', clean entry with deps cache and start again (repetitions) - statistical repetitions (default=1), for now statistical analysis is not used (TBD) (mali_hwc) - if 'yes', dump MALI hardware counters (dvdt_prof) - if 'yes', use dvdt_prof to collect opencl stats (only for opencl programs) (flags) - pass flags for compiler compilation of tests (-O3 by default) (iterations) - autotuning iterations (-1 by default, i.e. all possible such as batch size) (custom_autotuning) - dict to customize autotuning (can be added via external file in cmd @some-name.json) (autotune_id) - get autotune/{autotune_id}.json from program entry to start autotuning (no_record) - if "yes", do not record experiments ("no" by default, i.e. experiments will be recorded) (record_uoa) - use this experiment UOA to record all data to (timestamp) - use this instead of timestamp (record_repo) - if !='', record to this repo (local by default) (resume) - if 'yes', continue interrupted experiment identified by `timestamp` (skip_output_validation) - skip validation of output (dangerous during auto-tuning - some optimizations may break semantics or change accuracy) (output_validation_repo) - output validation repo UOA (when recording new output) (overwrite_reference_output) - if 'yes', overwrite reference output (useful if broken) (update_platform_init) - update platform.init scripts (ask user) } Output: { return - return code = 0, if successful > 0, if error (error) - error text if return > 0 } ''' Run prepared pipeline. ''' assert self.prepared_pipeline # Tune dependencies # TODO: it looks like should be working but it does not. # program.pipeline resolves deps even thought they are already resolved, # and we loose this optimization of prepared dependencies. if self.deps and self.skip_compilation: for dep in self.deps: if ('cus' in self.deps[dep]) and self.deps[dep]['cus'].get('dynamic_lib'): self.deps[dep]['cus']['skip_copy_to_remote'] = 'yes' self.prepared_pipeline['dependencies'] = self.deps params_json = { 'action': 'autotune', 'module_uoa': 'pipeline', 'data_uoa': 'program', 'meta': self.__make_experiment_meta(), 'tags': self.tags, 'pipeline': self.prepared_pipeline, 'features_keys_to_process': ['##choices#*'], 'iterations': self.options.iterations, 'repetitions': self.options.repetitions, 'record': yes_no(self.options.record), 'record_repo': self.config.exchange_repo, 'record_experiment_repo': self.config.exchange_subrepo, 'record_failed': 'yes', 'record_dict': { 'subview_uoa': cfg['data_deps']['experiment.view.nntest'] }, 'record_params': { 'search_point_by_features': 'yes' }, 'record_uoa': self.record.uoa, 'pause': yes_no(self.options.pause), 'pause_if_fail': yes_no(self.options.pause_if_fail), 'skip_stat_analysis': yes_no(self.dvdt_prof), # too much raw statistics 'out': 'con' } self.__apply_autotuning_params(params_json) # Start benchmarking or autotuning r = ck_access(params_json) if r.get('fail') == 'yes': reason = r.get('fail_reason') or 'unknown reason' CKException.throw('autotuning failed (%s)' % reason, code=10) def __make_experiment_meta(self): ''' Prepare experiment entry meta ''' meta = { 'timestamp': self.config.timestamp, 'stimestamp': self.config.stimestamp, 'user': self.config.user, 'nntest_ver': cfg['version'], 'scenario_module_uoa': work['self_module_uid'], 'host_os_uid': self.platform.host_os_uid, 'target_os_uid': self.platform.target_os_uid, 'target_device_id': self.platform.device_id, 'cpu_name': self.platform.cpu_name, 'cpu_abi': self.platform.cpu_abi, 'cpu_uid': self.platform.cpu_uid, 'os_name': self.platform.os_name, 'os_uid': self.platform.os_uid, 'plat_name': self.platform.name, 'plat_uid': self.platform.uid, 'gpu_name': self.platform.gpu_name, 'gpgpu_name': self.platform.gpgpu_name2, 'gpgpu_vendor': self.platform.gpgpu_vendor, 'opencl': self.platform.opencl_version, 'prog_uoa': self.program.uoa, 'prog_uid': self.program.uid, 'prog_type': self.program.type, 'species': self.program.species_uoas_str, 'cmd_key': self.command.key, 'dataset_uoa': self.dataset.uoa, 'dataset_uid': self.dataset.uid, 'dataset_file': self.dataset_file, 'versions': self.__get_deps_versions() } # Add hostname if required if ck.cfg.get('record_nntest_hostname','') == 'yes': import platform meta['platform_hostname'] = platform.node() return meta def __apply_autotuning_params(self, target_json): # Check if program meta has global autotuning if self.program.autotuning: target_json.update(self.program.autotuning) # Check if program meta has autotuning for a given command line if self.command.autotuning: target_json.update(self.command.autotuning) # Check if autotune_id autotuning = self.program.get_autotuning_from_file(self.autotune_id) if autotuning: target_json.update(autotuning) # Check if external autotuning is defined if self.options.custom_autotuning: target_json.update(self.options.custom_autotuning) def __format_batch_sizes(self): autotuning = self.program.get_autotuning_from_file(self.autotune_id) batch_size_choice_order = -1 for order, param in enumerate(autotuning.get('choices_order', [])): if param and 'CK_IN_SHAPE_N' in param[0]: batch_size_choice_order = order break if batch_size_choice_order >= 0: choices_selection = autotuning.get('choices_selection', []) if batch_size_choice_order < len(choices_selection): choice = choices_selection[batch_size_choice_order] batch_sizes = range(choice.get('start',0), choice.get('stop',0)+1, choice.get('step',0)) batch_sizes = [str(bs) for bs in batch_sizes] if self.options.iterations > -1 and self.options.iterations < len(batch_sizes): batch_sizes = batch_sizes[:self.options.iterations] self.batches_info = ','.join(batch_sizes) # TODO: currently we only support `loop` type of batches choices selection if choice.get('type','') != 'loop': self.batches_info += ' (nonstandard autotuning: number of iterations may be estimated incorrectly)' return self.batches_info return '' def __get_deps_versions(self): r = ck_access({'action': 'get_all_versions_in_deps', 'module_uoa': cfg['module_deps']['env'], 'deps': self.deps}) return r['versions'] def print_report(self): ck.out('- Program: {} ({})'.format(self.program.uoa, self.program.uid)) ck.out('- Library: {} {} ({})'.format(self.library.data_name, self.library.version, self.library.data_uoa)) # `compile_deps` is not yet resolved at `--list_tests` stage if self.compile_deps: compiler = self.compile_deps.get('compiler',{}) ck.out('- Compiler: {} v{} ({})'.format(compiler.get('dict',{}).get('data_name','?'), compiler.get('ver','?'), compiler.get('uoa','?'))) ck.out('- Shape: dataset:{}:{}'.format(self.dataset.uoa, self.dataset_file)) ck.out('- Autotune ID: {}'.format(self.autotune_id)) ck.out('- Batch size(s): {}'.format(self.batches_info)) # experiment recording can be suppressed with `--no_record` if self.record.cid: ck.out('- Repo: {}'.format(self.record.cid)) ck.out('- Tags: {}'.format(self.tags)) def crowdsource(i): "